clear all
set more off
cap log close

********************************************************************************
***** Project: The Short and Long Term Effects of In-Person Performance Feedback
********************************************************************************
***** A. R. Soetevent & G. J. Romensen
********************************************************************************
***** Basic regressions
********************************************************************************
*global filepath "C:\JPEMicReplication"
*global paperpath "$filepath\TablesGraphs"

local abcd "acceleratie rem bochten fueleconomyLpKM"
log using "$filepath/Logs/X06BasicRegressions.log", replace

/*** Notes ***/
* No fuel economy observations postfeedback in urban area.

use "$filepath\DEPO\DataMainAnalysisDEPO.dta"

gen byte ZH = 0 
replace ZH = 1 if regio == "ZH"
gen byte FR = 0 
replace FR = 1 if regio == "FR"

** Drop data from ZH ** 
keep if FR == 1
*  Drop months with imperfect tracking by coaches 
drop if datum>date("30-4-2016", "DMY")

** Newly defined variables [geplande_ritafstand is in meters, for this reason *1000]
gen aantal_haltespKM = (1000*aantal_haltes)/geplande_ritafstand
label variable aantal_haltespKM "Nr. stops per km."

*hist aantal_haltespKM, by(stadsrit) name(noStopsStadStreek, replace)
*graph export "$paperpath/noStopsStadStreek.png", replace name(noStopsStadStreek)

****************************************
*** A. Create global list of covariates
****************************************
* outcome variables
global abcd "acceleratie rem bochten fueleconomyLpKM"

global covMaand "maand_1 maand_2 maand_3 maand_4 maand_6 maand_7 maand_8 maand_9 maand_10 maand_11 maand_12" 
* (May = default)

global covBusType "vdl10 vdl14  iris10 iris10cng iris12 iris12cng intouro"
* (vdl12 = default - more than 50 per cent of observations)

global covWeatherExt "Temp6_3C Temp3_0C Temp0_3C Temp3_6C Temp9_12C Temp12_15C Temp15_18C Temp18_21C Temp21_24C Temp24_27C Rain5_10mm Rain10_15mm Rain15_20mm Rain20_30mm Rain30_40mm Wind0_2ms Wind4_6ms Wind6_8ms Wind8_10ms Wind10_20ms"  
*(Defaults: Wind2_4ms, Temp6_9C, Rain0_5mm)
global covWeather "Temp10_5C Temp5_10C Temp15_20C Temp20_25 Rain0_5mm Rain5_50mm Wind0_3ms Wind5_8ms Wind8_20ms"  
*(Defaults: Wind4_5ms, Temp10_15C, Rain0_0mm)


gen triplength=geplande_ritafstand/1000
global covEnvironm "ochtendspits avondspits uitleenrit aantal_haltespKM stadsrit triplength"
** Note: this is other covEnvironm than used in other do files: "geplande_ritafstand aantal_haltes" have been replaced by "aantal_haltespKM"

global covPassengers "lnovcheckins ovcheckinsmissing"

global covEndogenous "punctuality"

********************************************************************************
*** B. Time trend in ABC + Fuel economy correcting for time-variant variables
********************************************************************************
set matsize 10000


/*** Determine the set of observations used for the analysis ***/
gen byte regobsfuel=1
replace regobsfuel=0 if geplande_ritafstand==. | lnovcheckins==. | punctuality==. | aantal_haltespKM==. | dep_fueleconomyLpKM==.

gen byte regobsabc=1
replace regobsabc=0  if geplande_ritafstand==. | lnovcheckins==. | punctuality==. | aantal_haltespKM==. | dep_acceleratie==. | dep_bochten==. | dep_rem==.
xtset chauf_nr_rug 

** No. of observation per driver per outcome dimension pre-coaching
sort chauf_nr_rug 
by chauf_nr_rug: egen postcoachingobs = total(postcoaching == 1)
gen regobsabcprecoaching = regobsabc * (1-postcoaching)
by chauf_nr_rug: egen NobsPerDriverdep_abc = total(regobsabcprecoaching == 1)
gen regobsfuelprecoaching = regobsfuel * (1-postcoaching)
by chauf_nr_rug: egen NobsPerDriverdep_fuel = total(regobsfuelprecoaching == 1)
drop regobsabcprecoaching regobsfuelprecoaching


local i = 0
gen byte selectie=0
foreach var in $abcd {
	replace selectie = 0 
local i = `i' + 1
	if `i'==1 { 
		local ynaam  "Acceleration" 
		replace selectie=regobsabc  if (postcoaching == 0 & NobsPerDriverdep_abc >= 25)
		
	}
	else if `i'==2 { 
		local ynaam  "Braking" 
		replace selectie=regobsabc  if (postcoaching == 0 & NobsPerDriverdep_abc >= 25)
	}
	else if `i'==3 { 
		local ynaam  "Cornering" 
		replace selectie=regobsabc  if (postcoaching == 0 & NobsPerDriverdep_abc >= 25)
	}
	else if `i'==4 { 
		local ynaam  "Fuel economy" 
		replace selectie=regobsfuel  if (postcoaching == 0 & NobsPerDriverdep_fuel >= 25)
		* drop (irisbus) observations that do not have fuel economy outcomes.
	}
		
		
		di "Fixed effects regression: Determinants of `var'"

	** TABLES IN TEX ** 
	local verklvar "vdl10 vdl14 intouro ochtendspits avondspits uitleenrit aantal_haltespKM triplength stadsrit lnovcheckins punctuality _cons"
	label variable vdl10 "VDL 10m"
	label variable vdl14 "VDL 14m"
	label variable iris10 "Iris 10m"
	label variable iris10cng "Iris 10m cng"
	label variable iris12 "Iris 12m"
	label variable iris12cng "Iris 12m cng"
	label variable intouro "Intouro"
	label variable triplength "Trip length (in km.)"
	label variable ochtendspits "Rush hour 7-10am"
	label variable avondspits "Rush hour 4-7pm"
	label variable uitleenrit "Non-scheduled trip"
	label variable aantal_haltespKM "No. of stops per km."
	label variable stadsrit "Urban trip"
	label variable lnovcheckins  "Ln(No. of passengers)"
	label variable punctuality "Punctuality" 
	

*****************************************************************************
***  [Tables C.1-C.4: Determinants of Fuel Economy and ABC]
*****************************************************************************
		areg dep_`var' $covBusType 				if selectie == 1 , absorb(lijn_nr) cluster(chauf_nr_rug)
		estadd loc weer	    "No", replace
		estadd loc driverfe "No", replace
		estadd loc dayfe 	"No", replace
		estadd loc linefe "No", replace 
		estimates store mfe`var'0,  title(mfe`var'0)

		areg dep_`var' $covBusType $covWeather 	if selectie == 1 , absorb(lijn_nr) cluster(chauf_nr_rug)
		estadd loc weer	    "Yes", replace
		estadd loc driverfe "No", replace
		estadd loc dayfe 	"No", replace
		estadd loc linefe "No", replace 
		estimates store mfe`var'1,  title(mfe`var'1)
		
		areg dep_`var'  $covBusType $covEnvironm $covPassengers $covEndogenous $covWeather if selectie == 1, absorb(lijn_nr) cluster(chauf_nr_rug)
		estadd loc weer	    "Yes", replace
		estadd loc driverfe "No", replace
		estadd loc dayfe 	"No", replace
		estadd loc linefe "Yes", replace 
		estimates store mfe`var'2,  title(mfe`var'2)
		
		areg dep_`var' $covBusType $covEnvironm $covPassengers $covEndogenous i.lijn_nr   if selectie == 1, absorb(datum) cluster(chauf_nr_rug)
		estadd loc weer	    "No", replace
		estadd loc driverfe "No", replace
		estadd loc dayfe 	"Yes", replace
		estadd loc linefe "Yes", replace 
		estimates store mfe`var'3,  title(mfe`var'3)
		predict double residfe`var', residuals

		areg dep_`var' $covBusType $covEnvironm $covPassengers $covEndogenous i.lijn_nr ibn.chauf_nr_rug  if selectie == 1, absorb(datum) cluster(chauf_nr_rug)
		estadd loc weer	    "No", replace
		estadd loc driverfe "Yes", replace
		estadd loc dayfe 	"Yes", replace
		estadd loc linefe "Yes", replace 
		estimates store mfe`var'4,  title(mfe`var'4)
		estimates save "$paperpath/FIXEDEFFECTSindividualeffectsFR`var'", replace
		qui levelsof chauf_nr_rug if selectie == 1, local(g)
		gen fe`var' = .
		gen se`var' = .
		local fesum = 0
		local fesumN = 0 
		local ndrivers = r(N)	
			foreach x of local g{
					* Copy into the full FE variable
					* Note the regression was run with a constant term so we must add it to the FE
				qui replace fe`var' = _b[`x'.chauf_nr_rug] + _b[_cons]  if chauf_nr_rug == `x' 
				qui replace se`var' = _se[`x'.chauf_nr_rug] 			if chauf_nr_rug == `x' 
				local fesum = `fesum' + fe`var' 
				local fesumN = `fesumN' + 1
				*di "`fesum'"
			}	
		local feavg = `fesum'/`fesumN'
		qui summ dep_`var' if selectie == 1
			local rawmean = r(mean)
			display "raw mean value of outcome `var': `rawmean'"
			* generate  fe`var'meanv = grand average + fixed effect driver - average driver fixed effect
		gen fe`var'meanv = fe`var' - `feavg' + `rawmean'
		eststo clear 
		
		esttab mfe`var'0 mfe`var'1 mfe`var'2 mfe`var'3 mfe`var'4  using "$paperpath/TABfe`var'BasicReg.tex", replace f ///
		keep(`verklvar') ///
		label booktabs b(3) p(3) eqlabels(none) collabels(none) mlabels(none) ///
		starlevels($^{*}$ 0.10 $^{**}$ 0.05 $^{***}$ 0.01) ///
		cells(b(star fmt(%9.3f) ) se(par fmt(%9.3f))) ///
		stats(r2 N weer driverfe dayfe linefe, fmt(%9.3g)  labels( "R$^2$" "Number of trip-level observations" "Weather dummies" "Driver fixed effects" "Day fixed effects" "Route fixed effects")) ///
		varlabels(_cons Constant) ///
		prehead("\tabcolsep=0.25cm" "\begin{tabular}{l*{@M}{C{2.0cm}}}\hline\hline" "Dependent variable: &\multicolumn{5}{c}{\textbf{`ynaam'}} \\" "\cline{2-6}") posthead(\hline) ///
		prefoot(\hline) postfoot("\hline" "\end{tabular}")
	
		** Empirical Bayes adjusted fixed effects to account for measurement error, Chandra et al (AER, 2016)
		ebayes fe`var' se`var', absorb(rndlocid) gen(feEB`var') bee(att`var') var(`var'_var) uvar(`var'_uvar)
		
		        label var `var'_uvar "`var' / underlying var"
				label var `var'_var " `var' / underlying var (w/in-HRR)"
				label var  feEB`var' "`var' / EB-adj FE"
				

		* Input: 
		tabstat dep_`var'    if selectie == 1, by(chauf_nr_rug) statistics(mean sd n)
		tabstat residfe`var' if selectie == 1, by(chauf_nr_rug) statistics(mean sd n)

}


** Raw Correlations:
corr dep_acceleratie dep_rem dep_bochten dep_fueleconomyLpKM if selectie==1  

***********************************************************************************
***  [Table 2: Summary Statistics Variation in Productivity Metrics across Drivers]
***********************************************************************************

* create dupli-dummy which is 1 for one observation per driver and 0 otherwise.
sort chauf_nr_rug datum
by chauf_nr_rug: gen obshulp = _n
	gen byte dupli = 1
	replace dupli = 0 if obshulp == 1
	drop obshulp

foreach var in $abcd {
sum dep_`var' if selectie == 1
sum fe`var' if dupli == 0
sum fe`var'meanv if dupli == 0
}
	
* summary stats - quality
matrix summ_qual = J(7,4,.)
matrix colnames summ_qual = acceleratie rem bochten fuel
matrix rownames summ_qual = qual_avg qual_sd Dp90p10 coefofvar attenuation_mean attentuation_median ndrivers
	
local c = 1
foreach var in $abcd {

** Code ajusted from Chandra et al. (2016), summstats.do
* avg. productivity measures across drivers 
* Chandra et al. would do this with no adjustment driving conditions, using fe`var',  the avg for a productivity measure is for a driver with all zeroes for the driving condition adjustments 
* We instead use fe`var'meanv, which adds the difference between a driver's fe`var'  and the average fe`var' to the grand mean of the outcome variable in the selected period.
		di "OUTCOME VARIABLE: `var'"
		summ fe`var'meanv if dupli == 0
		matrix summ_qual[1,`c'] = r(mean)
		local ndrivers = r(N)	
		* underlying sd
		summ `var'_uvar if dupli == 0
		matrix summ_qual[2,`c'] = sqrt(r(mean))
		* room for improvement p10 to p90
		egen feEB`var'mean = mean(feEB`var') if dupli == 0
		tabstat feEB`var' if dupli == 0, statistics(p10 mean p90 n) save
		qui ret li
		mat roomtoimprove = r(StatTotal)
		matrix summ_qual[3,`c'] = roomtoimprove[3,1]- roomtoimprove[1,1]
		di "Delta \hat\mu_EB|p90-\hat\mu_EB|p10:" roomtoimprove[3,1]- roomtoimprove[1,1]
		* coefficient of variation
		matrix summ_qual[4,`c'] = summ_qual[2,`c']/summ_qual[1,`c']

		* attenuation
		summ att`var' if dupli == 0, detail
		matrix summ_qual[5,`c'] = r(mean)
		matrix summ_qual[6,`c'] = r(p50)
		* ndrivers
		summ feEB`var' if dupli == 0
		matrix summ_qual[7,`c'] = r(N)
		* should be the same
		assert `ndrivers'==r(N)
	local c = `c' + 1
}


putexcel set $paperpath/summ_qual.xlsx, replace
putexcel A1=matrix(summ_qual), names

************************************************************************
** [Table 2 - Panel A. Driving condition adjusted quality measures ]
************************************************************************
matlist summ_qual

* Wrong sd with measurement error
foreach var in $abcd {
	tabstat feEB`var' if dupli == 0, statistics(sd mean n)
}

* Wrong sd with measurement error
foreach var in $abcd {
	summ `var'_var if dupli == 0
	di sqrt(r(mean))	
}


* Wrong sd with measurement error
foreach var in $abcd {
	tabstat fe`var' if dupli == 0, statistics(sd mean n)
}

************************************************************************
** [Table 2 - Panel B. Correlation of quality metrics]
************************************************************************			
* correlation matrix - by condition [basic fixed effects]
matrix corr_cond = J(8,4,.)

	matrix colnames corr_cond = acceleratie rem bochten fuel
	matrix rownames corr_cond = acceleratie N rem N bochten N fuel N
	local r = 1
	foreach rowvar of varlist feacceleratie ferem febochten fefueleconomyLpKM {

		local c = 1
		foreach colvar of varlist feacceleratie ferem febochten fefueleconomyLpKM {
	
			if (`c'<=((`r'+1)/2)) {
				local cmd = "correl"

				* calculate correlation
				`cmd' `rowvar' `colvar' if dupli == 0 
				local rho = r(rho)
				local N = r(N)
				display "raw rho: `rho'"		
					if ("`rowvar'"=="`colvar'") {
					local rho = 1
				}

				matrix corr_cond[`r',`c'] = `rho' \ `N'
						
			}
	
			local c = `c' + 1
		}

		local r = `r' + 2
}

************************************************************************
** [Table 2 - Panel B. Correlation of quality metrics]
************************************************************************	
putexcel set $paperpath/corr_cond.xlsx, replace
putexcel A1=matrix(corr_cond), names
matlist corr_cond

	
* correlation matrix - by condition [EB-adjusted fixed effects]
matrix corr_condEB = J(8,4,.)

	matrix colnames corr_condEB = acceleratie rem bochten fuel
	matrix rownames corr_condEB = acceleratie N rem N bochten N fuel N
	local r = 1
	foreach rowvar of varlist feEBacceleratie feEBrem feEBbochten feEBfueleconomyLpKM {

		local c = 1
		foreach colvar of varlist feEBacceleratie feEBrem feEBbochten feEBfueleconomyLpKM {
	
			if (`c'<=((`r'+1)/2)) {
				local cmd = "correl"

				* calculate correlation
				`cmd' `rowvar' `colvar' if dupli == 0 
				local rho = r(rho)
				local N = r(N)
				display "raw rho: `rho'"		
				if ("`rowvar'"=="`colvar'") {
					local rho = 1
				}

				matrix corr_condEB[`r',`c'] = `rho' \ `N'
						
			}
	
			local c = `c' + 1
		}

		local r = `r' + 2
}

putexcel set $paperpath/corr_condEB.xlsx, replace
putexcel A1=matrix(corr_condEB), names

matlist corr_cond
matlist corr_condEB

	
** b. Residual correlation
corr residfeacceleratie residferem residfebochten residfefueleconomyLpKM  if selectie==1




******** Graphs: Driver fixed effects corrected for attenuation ********
************************************************************************
********* [Figure B.1: Driver Fixed Effect Estimates] ******************
************************************************************************
** Relation between uncorrected fixed effects and empirical Bayes estimates	[evaluated with average value outcome variable as midpoint]
	egen feMeanfueleconomyLpKM  = mean(feEBfueleconomyLpKM)
	summ fefueleconomyLpKMmeanv if dupli == 0
	gen fehulpfueleconomyLpKM   =  fefueleconomyLpKM - feMeanfueleconomyLpKM + r(mean)
	gen feEBhulpfueleconomyLpKM =  feEBfueleconomyLpKM - feMeanfueleconomyLpKM + r(mean)
	sort  feEBhulpfueleconomyLpKM 
	gen obsnrscatter = 100*_n/548786
	twoway (scatter fehulpfueleconomyLpKM  obsnrscatter, msize(tiny) ms(Oh))  (scatter feEBhulpfueleconomyLpKM  obsnrscatter, msize(tiny) ms(Dh)) if dupli == 0 & feEBfueleconomyLpKM~=., ///
		xtitle("Rank percentile") ytitle("fixed effect estimate") graphregion(fcolor(white) lcolor(white)) bgcolor(white) ///
	 legend(label(1 "Estimated {&mu}{subscript:i}") label(2 "Estimated {&mu}{subscript:i}{superscript: EB}")) ///
		name(fixedeffects_feEBfueleconomyLpKM, replace)
			graph display fixedeffects_feEBfueleconomyLpKM
			graph save "$paperpath\fixedeffects_feEBfueleconomyLpKM.png", replace
			graph export "$paperpath\fixedeffects_feEBfueleconomyLpKM.png", replace
	drop obsnrscatter 


local abc "acceleratie rem bochten"
foreach var of local abc {	
    egen feMean`var'  = mean(feEB`var')
	summ fe`var'meanv if dupli == 0
	gen fehulp`var'   =  fe`var' - feMean`var' + r(mean)
	gen feEBhulp`var' =  feEB`var' - feMean`var' + r(mean)
	sort  feEB`var'
	gen obsnrscatter = 100*_n/483370
	twoway (scatter fehulp`var' obsnrscatter, msize(tiny) ms(Oh))  (scatter feEBhulp`var'  obsnrscatter, msize(tiny) ms(Dh)) if dupli == 0 & feEB`var'~=., ///
		xtitle("Rank percentile") ytitle("fixed effect estimate") graphregion(fcolor(white) lcolor(white)) bgcolor(white) ///
	 legend(label(1 "Estimated {&mu}{subscript:i}") label(2 "Estimated {&mu}{subscript:i}{superscript: EB}")) ///
		name(fixedeffects_`var', replace)
			graph display fixedeffects_`var'
			graph save "$paperpath\fixedeffects_`var'.png", replace
			graph export "$paperpath\fixedeffects_`var'.png", replace
	drop obsnrscatter
	
}

		
************************************************************************
** [Table 2 - Panel A. Driving condition adjusted quality measures ]
************************************************************************
** Attenuation factor
************************************************************************
sum att* if dupli == 0
** due to many observations per driver attenuation factor is overall small

* fixed effects
sum fe*fuel* if dupli == 0
sum fe*accel* if dupli == 0
sum fe*bochten* if dupli == 0
sum fe*rem* if dupli == 0


* var() - variable to put underlying variance of the fe distribution
*         (conditional on the RHS vars)
* uvar() - variable to put unconditional underlying variance of the fe distribution
*         (underlying variance of the fe + variance of theta)
sum *var if dupli == 0


local abcd "acceleratie rem bochten fueleconomyLpKM"
foreach var of local abcd {
	* Compute rank driver on each dimension
	egen ferank`var'=rank(fe`var') if dupli == 0, unique
	sum ferank`var'
	replace ferank`var'=100*ferank`var'/r(max)

	egen feEBrank`var'=rank(feEB`var') if dupli == 0, unique
	sum feEBrank`var'
	replace feEBrank`var'=100*feEBrank`var'/r(max) if dupli == 0

}


* Correlations fixed effects
corr feEB*rank* if dupli == 0
corr ferank*  if dupli == 0


*************************************************************
** Some regressions on impact ext. factors on FE and ABC ****
*************************************************************
label variable Temp6_3C "Temp: -6 to -3 Celsius"
label variable Temp3_0C "-3 to 0 C"
label variable Temp0_3C "0 to 3 C"
label variable Temp3_6C "3 to 6 C"
label variable Temp9_12C "9 to 12 C"
label variable Temp12_15C "12 to 15 C" 
label variable Temp15_18C "15 to 18 C"
label variable Temp18_21C "18 to 21 C"
label variable Temp21_24C "21 to 24 C"
label variable Temp24_27C "24 to 27 C"
label variable Temp10_5C "Temp: -10 to +5 Celsius"
label variable Temp5_10C "5 to 10 C"
label variable Temp15_20C "15 to 20 C"
label variable Temp20_25C "20 to 25 C"
label variable Rain5_10mm "Rain: 5 to 10 mm"
label variable Rain0_5mm "Rain: 0 to 5 mm" 
label variable Rain5_50mm "Rain: 5 to 50 mm" 
label variable Rain10_15mm "10 to 15 mm"
label variable Rain15_20mm "15 to 20 mm"
label variable Rain20_30mm "20 to 30 mm"
label variable Rain30_40mm "30 to 40 mm"
label variable Wind0_2ms  "Wind: 0 to 2 m/s"
label variable Wind4_6ms  "4 to 6 m/s"
label variable Wind6_8ms  "6 to 8 m/s"
label variable Wind8_10ms "8 to 10 m/s"
label variable Wind10_20ms "10 to 20 m/s"
label variable Wind0_3ms "0 to 3 m/s"
label variable Wind5_8ms "5 to 8 m/s"
label variable Wind8_20ms "8 to 20 m/s"

label variable vdl10 "VDL 10m"
label variable vdl14 "VDL 14m" 
label variable iris10 "IRIS 10m"
label variable iris10cng "IRIS 10m - gas" 
label variable iris12 "IRIS 12m" 
label variable iris12cng "IRIS 10m - gas" 
label variable intouro  "INTOURO" 
label variable ochtendspits "rush hour (7-10am)"
label variable avondspits "rush hour (4-7pm)"
label variable uitleenrit "external trip"
label variable geplande_ritafstand "distance"
label variable aantal_haltes "no. of stops"
label variable lnovcheckins "ln(no. checkins)"
label variable punctuality "punctuality"


** Save EB-estimates and ranking per driver
drop if dupli == 1
keep chauf_nr_rug treatment fte coachdatum_1 coachdatum_2 coachdatum_3 coachdatum_4 coachdatum_5 regio eco_coach_nr_rug geslacht fulltimer T2 T3 T4 rndlocid ZH FR NobsPerDriverdep_abc NobsPerDriverdep_fuel selectie feacceleratie seacceleratie feacceleratiemeanv attacceleratie acceleratie_var acceleratie_uvar feEBacceleratie ferem serem feremmeanv attrem rem_var rem_uvar feEBrem febochten sebochten febochtenmeanv attbochten bochten_var bochten_uvar feEBbochten fefueleconomyLpKM sefueleconomyLpKM fefueleconomyLpKMmeanv attfueleconomyLpKM fueleconomyLpKM_var fueleconomyLpKM_uvar feEBfueleconomyLpKM feMeanfueleconomyLpKM fehulpfueleconomyLpKM  feEBhulpfueleconomyLpKM feEBacceleratiemean feEBremmean feEBfueleconomyLpKMmean feMeanacceleratie fehulpacceleratie feEBhulpacceleratie feMeanrem fehulprem feEBhulprem feMeanbochten fehulpbochten feEBhulpbochten ferankacceleratie feEBrankacceleratie ferankrem feEBrankrem ferankbochten feEBrankbochten ferankfueleconomyLpKM feEBrankfueleconomyLpKM dienstjaren dienstgroup age agegroup gendergroup


*! The following chauf_nr_rug are eco-coaches: 
* FR: "939 1404 519 1286 1610 531"
* ZH: "936 594 808 1297 1618 731 1402"

gen byte IsCoach = 0 
replace IsCoach  = 1 if (chauf_nr_rug == 939 |chauf_nr_rug == 1404 |chauf_nr_rug == 519|chauf_nr_rug == 1286 |chauf_nr_rug == 1610 |chauf_nr_rug == 531)
label variable IsCoach "= 1 if driver is eco-coach, 0 otherwise"
compress

save "$filepath\DEPO\EstimatedFixedEffectsDriverLevelDEPO.dta", replace

** How well do the coaches themselves do relative to the other drivers? 
tabstat feEBrank*, by(IsCoach)

tab rndloc chauf_nr_rug if IsCoach == 1

foreach var in $abcd {
tab feEBrank`var' chauf_nr_rug if IsCoach == 1

tab feEBrank`var' IsCoach  if rndloc == 13
tab feEBrank`var' IsCoach  if rndloc == 15
tab feEBrank`var' IsCoach  if rndloc == 16
tab feEBrank`var' IsCoach  if rndloc == 18

corr ferank`var' feEBrank`var' if IsCoach == 0 
}

**********************************************************************************************
**** [Figure D.3: Driver Specific Estimated EB-adjusted Fixed Effects and Time of Coaching]
***********************************************************************************************
** EB-adjusted fixed effect vs. coaching date 
local i = 1 
foreach var in $abcd {	
    if `i' < 4 { 
	    * [fixed effects evaluated with average value outcome variable as midpoint]
	   	graph twoway (lfit feEBhulp`var' coachdatum_1) (scatter feEBhulp`var' coachdatum_1, msize(tiny) ms(Oh)) if IsCoach == 0 & feEB`var'~=. & coachdatum_1 <=date("30-4-2016", "DMY"), xtitle("Date coached") ytitle("Estimated EB-adjusted driver fixed effect") graphregion(fcolor(white) lcolor(white)) bgcolor(white)  legend(off) 	tlabel(20362 20423 20485 20545, format(%tddmy)) ///
	name(FEEBDateCoached_`var', replace)
			graph display FEEBDateCoached_`var'
			graph save "$paperpath\FEEBDateCoached_`var'.png", replace
			graph export "$paperpath\FEEBDateCoached_`var'.png", replace		
	}	
	di "`i'"
	    else if `i' == 4 { 
		    * [fixed effects evaluated with average value outcome variable as midpoint]
	 	graph twoway (lfit feEBhulp`var' coachdatum_1) (scatter feEBhulp`var' coachdatum_1, msize(tiny) ms(Oh)) if IsCoach == 0 & feEB`var'~=. & coachdatum_1 <=date("30-4-2016", "DMY"), xtitle("Date coached") ytitle("Estimated EB-adjusted driver fixed effect") graphregion(fcolor(white) lcolor(white)) bgcolor(white)  legend(off) 	tlabel(20362 20423 20485 20545, format(%tddmy)) ///
	name(FEEBDateCoached_`var', replace)
			graph display FEEBDateCoached_`var'
			graph save "$paperpath\FEEBDateCoached_`var'.png", replace
			graph export "$paperpath\FEEBDateCoached_`var'.png", replace
			local  i = `i' + 1
			
	}
			reg feEB`var' coachdatum_1 if IsCoach == 0 & feEB`var'~=. & coachdatum_1 <=date("30-4-2016", "DMY") & coachdatum_1~=. 
	local  i = `i' + 1	
}


gen byte NeverCoached = 0
replace NeverCoached = 1 if coachdatum_1 ==.
** None of the differences is statistically significant:
foreach var in $abcd {
reg feEBrank`var' NeverCoached if IsCoach == 0
} 
tabstat feEBrank* if NeverCoached == 0 & IsCoach == 0, s(min mean p50 max)
tabstat feEBrank* if NeverCoached == 1 & IsCoach == 0, s(min mean p50 max)



* matrix mean fixed effects untreated drivers - by condition 


 
* Development fixed effects uncoached drivers over time. 	
matrix meanfeUntreated = J(72,4,.)
matrix colnames meanfeUntreated = acceleratie rem bochten fuel
matrix rownames meanfeUntreated = jan15 se feb15 se mar15 se apr15 se may15 se jun15 se jul15 se aug15 se sep15 se oct15 se nov15 se dec15 se jan16 se feb16 se mar16 se apr16 se may16 se jun16 se jul16 se aug16 se sep16 se oct16 se nov16 se dec16 se jan17 se feb17 se mar17 se apr17 se may17 se jun17 se jul17 se aug17 se sep17 se oct17 se nov17 se dec17 se
	
	
	forv r =1(2)71 {
		local c = 1
		foreach colvar of varlist feEBrankacceleratie feEBrankrem feEBrankbochten feEBrankfueleconomyLpKM {
	
			 local m = mod((`r'+1)/2, 12) + 1
			 local y = 2015 + floor(`r'/24)
				tabstat `colvar' if IsCoach == 0 & (coachdatum_1==. | coachdatum_1 >= date("1-`m'-`y'", "DMY") ), s(mean semean) save
				 ret li
				 mat total = r(StatTotal) 
				local gemiddelde  = round(total[1,1], 0.001)
				local segemiddelde = round(total[2,1], 0.001)
				local mmin1 = 12*(`m'==1) + `m' - 1
				display "mean `mmin1'-`y': `gemiddelde'  (`segemiddelde')"		
				matrix meanfeUntreated[`r',`c']   = `gemiddelde'
				matrix meanfeUntreated[`r'+1,`c'] = `segemiddelde'
			local c = `c' + 1				
			}
		}


putexcel set $paperpath/meanfeUntreated.xlsx, replace
putexcel A1=matrix(meanfeUntreated), names

matlist meanfeUntreated

 
* Development fixed effects coached drivers over time. 	
matrix meanfeTreated = J(72,4,.)
matrix colnames meanfeTreated = acceleratie rem bochten fuel
matrix rownames meanfeTreated = jan15 se feb15 se mar15 se apr15 se may15 se jun15 se jul15 se aug15 se sep15 se oct15 se nov15 se dec15 se jan16 se feb16 se mar16 se apr16 se may16 se jun16 se jul16 se aug16 se sep16 se oct16 se nov16 se dec16 se jan17 se feb17 se mar17 se apr17 se may17 se jun17 se jul17 se aug17 se sep17 se oct17 se nov17 se dec17 se
	
	
	forv r =1(2)71 {
		local c = 1
		foreach colvar of varlist feEBrankacceleratie feEBrankrem feEBrankbochten feEBrankfueleconomyLpKM {
	
			 local m = mod((`r'+1)/2, 12) + 1
			 local mmin1 = 12*(`m'==1) + `m' - 1
			 local y = 2015 + floor((`r'+1)/24)
			 local ymin1 = `y' - (`mmin1' == 12) 
			  summ `colvar' if IsCoach == 0 & (coachdatum_1~=. &  coachdatum_1 >= date("1-`mmin1'-`ymin1'", "DMY") & coachdatum_1 < date("1-`m'-`y'", "DMY") ), meanonly
				if r(N) > 0 {
				* calculate correlation
				tabstat `colvar' if IsCoach == 0 & (coachdatum_1~=. &  coachdatum_1 >= date("1-`mmin1'-`ymin1'", "DMY") & coachdatum_1 < date("1-`m'-`y'", "DMY") ), s(mean semean) save
				 ret li
				 mat total = r(StatTotal) 
				local gemiddelde  = round(total[1,1], 0.001)
				local segemiddelde = round(total[2,1], 0.001)
				display "mean `mmin1'-`ymin1': `gemiddelde'  (`segemiddelde')"		
				matrix meanfeTreated[`r',`c']   = `gemiddelde'
				matrix meanfeTreated[`r'+1,`c'] = `segemiddelde'
				}
			local c = `c' + 1				
			}
		}

putexcel set $paperpath/meanfeTreated.xlsx, replace
putexcel A1=matrix(meanfeTreated), names

matlist meanfeTreated



  
foreach var in $abcd {
	twoway scatter  feEBrank`var' rndlocid , msize(tiny) mc(blue) name(feEBrank`var'LOC, replace)
}

************************************************************************
** [Table 2 - Panel C. Rank of coaches]
************************************************************************
tabstat feEBrank* if IsCoach == 1, s(min mean p50 max)


log close
